Figure. peQTNs



In [1]:

    
import cPickle
import glob
import os
import random
import subprocess

import cdpybio as cpb
from ipyparallel import Client
import matplotlib as mpl
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pybedtools as pbt
from scipy.stats import fisher_exact
import seaborn as sns
import tabix
import vcf as pyvcf
import weblogolib as logo

import cardipspy as cpy
import ciepy

%matplotlib inline
%load_ext rpy2.ipython

dy_name = 'figure_peqtns'

import socket
if socket.gethostname() == 'fl-hn1' or socket.gethostname() == 'fl-hn2':
    dy = os.path.join(ciepy.root, 'sandbox', dy_name)
    cpy.makedir(dy)
    pbt.set_tempdir(dy)
    
outdir = os.path.join(ciepy.root, 'output', dy_name)
cpy.makedir(outdir)

private_outdir = os.path.join(ciepy.root, 'private_output', dy_name)
cpy.makedir(private_outdir)



In [2]:

    
sns.set_style('whitegrid')

Each figure should be able to fit on a single 8.5 x 11 inch page. Please do not send figure panels as individual files. We use three standard widths for figures: 1 column, 85 mm; 1.5 column, 114 mm; and 2 column, 174 mm (the full width of the page). Although your figure size may be reduced in the print journal, please keep these widths in mind. For Previews and other three-column formats, these widths are also applicable, though the width of a single column will be 55 mm.



In [3]:

    
fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'no_cnv_nmd_vars_gv.tsv')
gv = pd.read_table(fn, index_col=0)

fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'peqtns.tsv')
peqtns = pd.read_table(fn, index_col=0)

fn = os.path.join(ciepy.root, 'output', 'gwas_analysis', 'pe_no_hla_grasp_counts.tsv')
grasp_counts = pd.read_table(fn, index_col=0)

fn = os.path.join(ciepy.root, 'output', 'gwas_analysis', 'grasp_results.tsv')
grasp_res = pd.read_table(fn, index_col=0)
#grasp_res['phenotype'] = grasp_res.phenotype.apply(lambda x: x.split(' (')[0])

grasp_counts.index = grasp_res.ix[grasp_counts.index, 'phenotype']
grasp_res.index = grasp_res.phenotype
grasp_res = grasp_res.ix[[x for x in grasp_res.index if 'xpression' not in x]]



In [4]:

    
fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'gene_variants_annotated.pickle')
gene_variant_pairs_annotated = pd.read_pickle(fn)

fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'tf_disruption.tsv')
tf_disrupt = pd.read_table(fn, index_col=0)

fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'motif_disruption.tsv')
motif_disrupt = pd.read_table(fn, index_col=0)

gene_info = pd.read_table(cpy.gencode_gene_info, index_col=0)



In [5]:

    
fn = os.path.join(ciepy.root, 'output', 'fine_mapping', 'ctcf_aggregate_counts.tsv')
ctcf_counts = pd.read_table(fn, index_col=0)



In [6]:

    
tf_color = "#DB7093"
dnase_color = "#663399"

legend_colors = [
    np.array((255,0,0)) / 255.,
    np.array((255,105,105)) / 255.,
    np.array((250,202,0)) / 255.,
    np.array((255,252,4)) / 255.,
    np.array((10,190,254)) / 255.,
    np.array((0,176,80)) / 255.,
    np.array((0,176,80)) / 255.,
    np.array((153,255,102)) / 255.,
    np.array((245,245,245)) / 255.,
    ]
ind = [
    'Active promoter',
    'Weak promoter',
    'Strong enhancer',
    'Weak/poised enhancer',
    'Insulator',
    'Transcriptional transition',
    'Transcriptional elongation',
    'Weak transcribed',
    'Heterochromatin',
]
legend_colors = pd.Series(legend_colors, index=ind)

Paper



In [7]:

    
a = pd.Series(gv['q.value_maurano'].values, index=gv.location)
b = pd.Series(peqtns['q.value_maurano'].values, index=peqtns.location)
a = a.drop(b.index)
ana = a.isnull().sum()
asig = sum(a.dropna() < 0.05)
ansig = a.dropna().shape[0] - asig
bna = b.isnull().sum()
bsig = sum(b.dropna() < 0.05)
bnsig = b.dropna().shape[0] - bsig



In [8]:

    
fig = plt.figure(figsize=(4.48, 6.5), dpi=300)

gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.text(0, 1, 'Figure 3',
        size=16, va='top')
ciepy.clean_axis(ax)
ax.set_xticks([])
ax.set_yticks([])
gs.tight_layout(fig, rect=[0, 0.93, 1, 1])

# peQTN example and legend
# Leave space for it (~3 inches tall)
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ciepy.clean_axis(ax)
rects = []
labels = []
for k in legend_colors.index:
    labels.append(k)
    rects.append(plt.Rectangle((0, 0), 0, 0, fc=legend_colors[k]))
lgd = ax.legend(rects, labels, loc='center', prop={'size':7}, ncol=3)
for p in lgd.get_patches():
    p.set_linewidth(0)
gs.tight_layout(fig, rect=[0, 0.25, 1, 0.31])

# Number of peQTNs
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
vc = peqtns.gene_id.value_counts().value_counts().sort_index()
ind = vc[vc.index > 5].index
vc['>5'] = vc[ind].sum()
vc = vc.drop(ind)
vc.plot(kind='bar', ax=ax, color='lightgrey')
plt.ylabel('Number of eGenes', fontsize=8)
plt.xlabel('Number of peQTNs per gene', fontsize=8)
for tick in ax.get_xticklabels() + ax.get_yticklabels():
    tick.set_fontsize(8)
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
ax.grid(axis='x')
gs.tight_layout(fig, rect=[0.5, 0.7, 1, 0.95])

# Number of DHSs overlapped
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
vc = peqtns.drop_duplicates(subset='location').roadmap_dnase_num.value_counts().sort_index()
vc.index = [int(x) for x in vc.index]
vc.plot(kind='bar', ax=ax, color='lightgrey')
for tick in ax.get_xticklabels() + ax.get_yticklabels():
    tick.set_fontsize(8)
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
ax.grid(axis='x')
ax.set_xlabel('Number of stem cell lines', fontsize=8)
ax.set_ylabel('Number of peQTNs', fontsize=8)
ax.yaxis.set_major_formatter(ciepy.comma_format)
gs.tight_layout(fig, rect=[0, 0.7, 0.5, 0.95])

# CTCF peak counts vs. genotype
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(7)
ax = sns.violinplot(x='genotypes', y='counts', data=ctcf_counts, color='grey',
                    order=[0, 1, 2], scale='count', linewidth=0.5)
sns.regplot(x='genotypes', y='counts', data=ctcf_counts, scatter=False, color='red', 
            ci=None, line_kws={'linewidth':0.8})
ax.set_ylabel('CTCF $\log_{10}$ peak\ncounts $z$-score', fontsize=8)
ax.set_xlabel('Genotype', fontsize=8)
gs.tight_layout(fig, rect=[0, 0, 0.5, 0.25])

t = fig.text(0.005, 0.925, 'A', weight='bold', 
             size=12)
t = fig.text(0.5, 0.925, 'B', weight='bold', 
             size=12)
t = fig.text(0.005, 0.7, 'C', weight='bold', 
             size=12)
t = fig.text(0.005, 0.23, 'D', weight='bold', 
             size=12)

plt.savefig(os.path.join(outdir, 'peqtns_skeleton.pdf'))
#plt.savefig(os.path.join(outdir, 'peqtns_gwas.png'), dpi=300)









    



/frazer01/home/cdeboever/software/anaconda/envs/cie/lib/python2.7/site-packages/matplotlib/gridspec.py:302: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "



In [68]:

    
cols = ['Snail Ref.', 'Snail Alt.',
        'Ref.', 'Alt.']
ciona_res = pd.DataFrame([[98, 0, 0, 2], [94, 0, 0, 0]], columns=cols, 
                         index=['Rep. 1', 'Rep. 2'])



In [109]:

    
fig = plt.figure(figsize=(4.48, 7.75), dpi=300)

gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.text(0, 1, 'Figure 3',
        size=16, va='top')
ciepy.clean_axis(ax)
ax.set_xticks([])
ax.set_yticks([])
gs.tight_layout(fig, rect=[0, 0.955, 1, 1])

# peQTN example and legend
# Leave space for it (~3 inches tall)
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ciepy.clean_axis(ax)
rects = []
labels = []
for k in legend_colors.index:
    labels.append(k)
    rects.append(plt.Rectangle((0, 0), 0, 0, fc=legend_colors[k]))
lgd = ax.legend(rects, labels, loc='center', prop={'size':7}, ncol=3)
for p in lgd.get_patches():
    p.set_linewidth(0)
gs.tight_layout(fig, rect=[0, 0.36, 1, 0.41])

# Number of peQTNs
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
vc = peqtns.gene_id.value_counts().value_counts().sort_index()
ind = vc[vc.index > 5].index
vc['>5'] = vc[ind].sum()
vc = vc.drop(ind)
vc.plot(kind='bar', ax=ax, color='lightgrey')
plt.ylabel('Number of eGenes', fontsize=8)
plt.xlabel('Number of peQTNs per gene', fontsize=8)
for tick in ax.get_xticklabels() + ax.get_yticklabels():
    tick.set_fontsize(8)
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
ax.grid(axis='x')
gs.tight_layout(fig, rect=[0.5, 0.75, 1, 0.96])

# Number of DHSs overlapped
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
vc = peqtns.drop_duplicates(subset='location').roadmap_dnase_num.value_counts().sort_index()
vc.index = [int(x) for x in vc.index]
vc.plot(kind='bar', ax=ax, color='lightgrey')
for tick in ax.get_xticklabels() + ax.get_yticklabels():
    tick.set_fontsize(8)
for tick in ax.get_xticklabels():
    tick.set_rotation(0)
ax.grid(axis='x')
ax.set_xlabel('Number of stem cell lines', fontsize=8)
ax.set_ylabel('Number of peQTNs', fontsize=8)
ax.yaxis.set_major_formatter(ciepy.comma_format)
gs.tight_layout(fig, rect=[0, 0.75, 0.5, 0.96])

# CTCF peak counts vs. genotype
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(7)
ax = sns.violinplot(x='genotypes', y='counts', data=ctcf_counts, color='grey',
                    order=[0, 1, 2], scale='count', linewidth=0.5)
sns.regplot(x='genotypes', y='counts', data=ctcf_counts, scatter=False, color='red', 
            ci=None, line_kws={'linewidth':0.8})
ax.set_ylabel('CTCF $\log_{10}$ peak\ncounts $z$-score', fontsize=8)
ax.set_xlabel('Genotype', fontsize=8)
gs.tight_layout(fig, rect=[0, 0.16, 0.5, 0.36])

# Ciona results
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax = ciona_res[['Snail Ref.', 'Snail Alt.']].T.plot(ax=ax, kind='bar', color=sns.color_palette()[0:2])
for t in ax.get_xticklabels():
    t.set_rotation(0)
ax.set_ylabel('Percent embryos with\nexpression in Tail Muscle', fontsize=8)
ax.legend(fancybox=True, frameon=True, fontsize=7)
ax.grid(axis='x')
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(7)
gs.tight_layout(fig, rect=[0.5, 0.16, 1, 0.36])
#gs.tight_layout(fig, rect=[0.4, 0.16, 1, 0.36])

t = fig.text(0.005, 0.93, 'A', weight='bold', 
             size=12)
t = fig.text(0.5, 0.93, 'B', weight='bold', 
             size=12)
t = fig.text(0.005, 0.75, 'C', weight='bold', 
             size=12)
t = fig.text(0.005, 0.34, 'D', weight='bold', 
             size=12)
t = fig.text(0.5, 0.34, 'E', weight='bold', 
             size=12)

plt.savefig(os.path.join(outdir, 'peqtns_skeleton.pdf'))



In [ ]:



In [ ]:



In [ ]:

fig = plt.figure(figsize=(4.48, 8), dpi=300) gs = gridspec.GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) ax.text(0, 1, 'Figure 3', size=16, va='top') ciepy.clean_axis(ax) ax.set_xticks([]) ax.set_yticks([]) gs.tight_layout(fig, rect=[0, 0.92, 1, 1]) # peQTN example and legend # Leave space for it (~3 inches tall) gs = gridspec.GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) ciepy.clean_axis(ax) rects = [] labels = [] for k in legend_colors.index: labels.append(k) rects.append(plt.Rectangle((0, 0), 0, 0, fc=legend_colors[k])) lgd = ax.legend(rects, labels, loc='center', prop={'size':7}, ncol=3) for p in lgd.get_patches(): p.set_linewidth(0) gs.tight_layout(fig, rect=[0, 0.35, 1, 0.4]) # Number of peQTNs gs = gridspec.GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) vc = peqtns.gene_id.value_counts().value_counts().sort_index() ind = vc[vc.index > 5].index vc['>5'] = vc[ind].sum() vc = vc.drop(ind) vc.plot(kind='bar', ax=ax, color='lightgrey') plt.ylabel('Number of eGenes', fontsize=8) plt.xlabel('Number of peQTNs per gene', fontsize=8) for tick in ax.get_xticklabels() + ax.get_yticklabels(): tick.set_fontsize(8) for tick in ax.get_xticklabels(): tick.set_rotation(0) ax.grid(axis='x') gs.tight_layout(fig, rect=[0.5, 0.73, 1, 0.95]) # Number of DHSs overlapped gs = gridspec.GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) vc = peqtns.drop_duplicates(subset='location').roadmap_dnase_num.value_counts().sort_index() vc.index = [int(x) for x in vc.index] vc.plot(kind='bar', ax=ax, color='lightgrey') for tick in ax.get_xticklabels() + ax.get_yticklabels(): tick.set_fontsize(8) for tick in ax.get_xticklabels(): tick.set_rotation(0) ax.grid(axis='x') ax.set_xlabel('Number of stem cell lines', fontsize=8) ax.set_ylabel('Number of peQTNs', fontsize=8) ax.yaxis.set_major_formatter(ciepy.comma_format) gs.tight_layout(fig, rect=[0, 0.73, 0.5, 0.95]) # CTCF peak counts vs. genotype gs = gridspec.GridSpec(1, 1) ax = fig.add_subplot(gs[0, 0]) for t in ax.get_xticklabels() + ax.get_yticklabels(): t.set_fontsize(7) ax = sns.violinplot(x='genotypes', y='counts', data=ctcf_counts, color='grey', order=[0, 1, 2], scale='count', linewidth=0.5) sns.regplot(x='genotypes', y='counts', data=ctcf_counts, scatter=False, color='red', ci=None, line_kws={'linewidth':0.8}) ax.set_ylabel('CTCF peak counts\n$z$-score', fontsize=8) ax.set_xlabel('Genotype', fontsize=8) gs.tight_layout(fig, rect=[0, 0.1, 0.5, 0.35]) t = fig.text(0.005, 0.925, 'A', weight='bold', size=12) t = fig.text(0.5, 0.925, 'B', weight='bold', size=12) t = fig.text(0.005, 0.72, 'C', weight='bold', size=12) t = fig.text(0.005, 0.33, 'D', weight='bold', size=12) plt.savefig(os.path.join(outdir, 'peqtns_skeleton.pdf')) #plt.savefig(os.path.join(outdir, 'peqtns_gwas.png'), dpi=300)

MED30 plot



In [11]:

    
%%R

suppressPackageStartupMessages(library(Gviz))
suppressPackageStartupMessages(library(GenomicFeatures))



In [12]:

    
gene_id = gene_info[gene_info.gene_name == 'MED30'].index[0]

res_fns = glob.glob(os.path.join(ciepy.root, 'private_output', 'run_eqtl_analysis', 'eqtls01', 
                                 'gene_results', '*', 'ENS*.tsv'))
res_fns = pd.Series(res_fns,
                    index=[os.path.splitext(os.path.split(x)[1])[0] for x in res_fns])
res = ciepy.read_emmax_output(res_fns[gene_id])
res = res.sort_values('BEG')
res = res.dropna(subset=['PVALUE'])

grange = res[['BEG']]
grange.columns = ['start']
grange['end'] = grange['start'] + 1
data = pd.DataFrame(-np.log10(res.PVALUE))
data = pd.DataFrame([-np.log10(res.PVALUE), -np.log10(res.PVALUE)],
                    index=['primary', 'primary_sig']).T
t = gene_variant_pairs_annotated[gene_variant_pairs_annotated.gene_id == gene_id]
data.index = res.MARKER_ID
data.ix[t.marker_id, 'primary'] = np.nan
data.ix[set(data.index) - set(t.marker_id), 'primary_sig'] = np.nan

starts = res.BEG

chrom = 'chr8'
start = 118532965 - 5000
end = 118552501 + 5000
fontsize = 8









    



/frazer01/home/cdeboever/software/anaconda/envs/cie/lib/python2.7/site-packages/IPython/kernel/__main__.py:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [13]:

    
data['peqtn'] = np.nan
data.ix['8:118532953_TG/T_rs148584171', 'peqtn'] = data.ix['8:118532953_TG/T_rs148584171', 'primary_sig']
data.ix['8:118532953_TG/T_rs148584171', 'primary_sig'] = np.nan



In [15]:

    
%%R -i data,grange,chrom,start,end,fontsize,starts,tf_color,dnase_color

ideoTrack <- IdeogramTrack(
    genome="hg19", 
    fontsize=fontsize, 
    fontsize.legend=fontsize,
    fontcolor='black', 
    cex=1, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1
)

gtrack <- GenomeAxisTrack(
    col="black", 
    cex=1, 
    fontsize=8, 
    col.id="black", 
    fontcolor="black", 
    fontface=1,
    fontface.group=1,
    lwd=1,
)

gr <- GRanges(
    seqnames=chrom, 
    ranges=IRanges(start=starts, width=rep(1, length(starts))),
    primary=data["primary"],
    psig=data["primary_sig"],
    peqtn=data['peqtn']
)

pvalTrack <- DataTrack(
    gr,
    groups=c("Not significant", "Significant", 'peQTN'),
    genome="hg19", 
    type="p", 
    alpha=0.75, 
    lwd=8,
    name="-log10 p-value", 
    fontsize=8,
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=0.5, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    fontcolor.title="black",
    fontface.title=1, 
    alpha.title=1,
    cex.legend=1,
    fontcolor.legend="black",
    fontface.legend=1,
    fontsize.legend=8,
)

biomTrack <- BiomartGeneRegionTrack(
    genome="hg19", 
    chromosome=chrom, 
    start=start, 
    end=end,
    name="", 
    fontsize=fontsize,
    collapseTranscripts='meta',
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black', 
    fontcolor.legend="black",
    background.title='transparent', 
    cex=1, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1, 
    geneSymbols=TRUE,
    cex.group=1,
    fontcolor.group="black",
    fontface.group=1,
    fontface.title=1, 
    alpha.title=1,
    lwd=0.8,
)

hmmTrack <- UcscTrack(
    track="Broad ChromHMM", 
    table="wgEncodeBroadHmmH1hescHMM",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="chromHMM",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

feat <- unique(feature(hmmTrack))
featCol <- setNames(as.list(rgb(t(sapply(strsplit(feat, ","),
as.numeric)), maxColorValue=255)), feat)
displayPars(hmmTrack) <- featCol

cebpbTrack <- UcscTrack(
    track="Uniform TFBS", 
    table="wgEncodeAwgTfbsSydhH1hescCebpbIggrabUniPk",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="CEBPB",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

dnaseTrack <- UcscTrack(
    track="Uniform DNaseI HS", 
    table="wgEncodeAwgDnaseUwdukeH1hescUniPk",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="DHS",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

cebpbTrack = setPar(cebpbTrack, "fill", tf_color)
dnaseTrack = setPar(dnaseTrack, "fill", dnase_color)









    



/frazer01/home/cdeboever/software/anaconda/envs/cie/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Note that the behaviour of the 'setPar' method has changed. You need to reassign the result to an object for the side effects to happen. Pass-by-reference semantic is no longer supported.

  res = super(Function, self).__call__(*new_args, **new_kwargs)



In [17]:

    
%%R -i fn,start,end

plotTracks(c(gtrack, biomTrack, pvalTrack, cebpbTrack, hmmTrack), chromosome=chrom, from=start, to=end, 
           col.title="black")



In [18]:

    
fn = os.path.join(outdir, 'med30.pdf')



In [19]:

    
%%R -i fn,start,end

pdf(fn, 4.48, 2.75)
plotTracks(c(gtrack, biomTrack, pvalTrack, cebpbTrack,
             hmmTrack), chromosome=chrom, from=start, to=end, 
           col.title="black", sizes=c(1.5, 1.5, 5, 0.5, 0.5))
dev.off()



In [ ]:

    
3 +

Presentation plot



In [15]:

    
%%R

suppressPackageStartupMessages(library(Gviz))
suppressPackageStartupMessages(library(GenomicFeatures))



In [101]:

    
gene_id = gene_info[gene_info.gene_name == 'POU5F1'].index[0]

res_fns = glob.glob(os.path.join(ciepy.root, 'private_output', 'run_eqtl_analysis', 'eqtls01', 
                                 'gene_results', '*', 'ENS*.tsv'))
res_fns = pd.Series(res_fns,
                    index=[os.path.splitext(os.path.split(x)[1])[0] for x in res_fns])
res = ciepy.read_emmax_output(res_fns[gene_id])
res = res.sort_values('BEG')
res = res.dropna(subset=['PVALUE'])

# res_fns = glob.glob(os.path.join(ciepy.root, 'private_output', 'run_eqtl_analysis', 'eqtls02', 
#                                  'gene_results', '*', 'ENS*.tsv'))
# res_fns = pd.Series(res_fns,
#                     index=[os.path.splitext(os.path.split(x)[1])[0] for x in res_fns])
# res2 = ciepy.read_emmax_output(res_fns[gene_id])
# res2 = res2.sort_values('BEG')
# res2 = res2.dropna(subset=['PVALUE'])

grange = res[['BEG']]
grange.columns = ['start']
grange['end'] = grange['start'] + 1
data = pd.DataFrame(-np.log10(res.PVALUE))
data = pd.DataFrame([-np.log10(res.PVALUE), -np.log10(res.PVALUE)],
                    index=['primary', 'primary_sig']).T
t = gene_variant_pairs_annotated[gene_variant_pairs_annotated.gene_id == gene_id]
data.index = res.MARKER_ID
data.ix[t.marker_id, 'primary'] = np.nan
data.ix[set(data.index) - set(t.marker_id), 'primary_sig'] = np.nan
# t = gene_variant_pairs_secondary[gene_variant_pairs_secondary.gene_id == gene_id]
# data.ix[t.marker_id, 'secondary'] = np.nan
# data.ix[set(data.index) - set(t.marker_id), 'secondary_sig'] = np.nan
starts = res.BEG

chrom = 'chr6'
start = 31110081
end = 31164667
fontsize = 8









    



/frazer01/home/cdeboever/software/anaconda/envs/cie/lib/python2.7/site-packages/IPython/kernel/__main__.py:21: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [102]:

    
data.head()









    Out[102]:






  
    
      
      primary
      primary_sig
    
    
      MARKER_ID
      
      
    
  
  
    
      6:30134987_C/T_rs56866777
      1.241391
      NaN
    
    
      6:30135134_T/C_rs117007413
      1.241391
      NaN
    
    
      6:30135198_A/G_rs184969485
      0.000913
      NaN
    
    
      6:30135298_G/C_rs144966950
      1.241391
      NaN
    
    
      6:30135395_C/T_rs116487075
      1.241391
      NaN



In [57]:

    
%%R -i data,grange,chrom,start,end,fontsize,starts,tf_color,dnase_color,hstarts

ideoTrack <- IdeogramTrack(
    genome="hg19", 
    fontsize=fontsize, 
    fontsize.legend=fontsize,
    fontcolor='black', 
    cex=1, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1
)

gtrack <- GenomeAxisTrack(
    col="black", 
    cex=1, 
    fontsize=8, 
    col.id="black", 
    fontcolor="black", 
    fontface=1,
    fontface.group=1,
    lwd=1,
)

gr <- GRanges(
    seqnames="chr6", 
    ranges=IRanges(start=starts, width=rep(1, length(starts))),
    primary=data["primary"],
    psig=data["primary_sig"],
)

pvalTrack <- DataTrack(
    gr,
    groups=c("Not significant", "Significant"),
    genome="hg19", 
    type="p", 
    alpha=0.75, 
    lwd=8,
    name="-log10 p-value", 
    fontsize=8,
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=0.5, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    fontcolor.title="black",
    fontface.title=1, 
    alpha.title=1,
    cex.legend=1,
    fontcolor.legend="black",
    fontface.legend=1,
    fontsize.legend=8,
)

biomTrack <- BiomartGeneRegionTrack(
    genome="hg19", 
    chromosome=chrom, 
    start=start, 
    end=end,
    name="", 
    fontsize=fontsize,
    collapseTranscripts='meta',
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black', 
    fontcolor.legend="black",
    background.title='transparent', 
    cex=1, 
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1, 
    geneSymbols=TRUE,
    cex.group=1,
    fontcolor.group="black",
    fontface.group=1,
    fontface.title=1, 
    alpha.title=1,
    lwd=0.8,
)

hmmTrack <- UcscTrack(
    track="Broad ChromHMM", 
    table="wgEncodeBroadHmmH1hescHMM",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="chromHMM",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

feat <- unique(feature(hmmTrack))
featCol <- setNames(as.list(rgb(t(sapply(strsplit(feat, ","),
as.numeric)), maxColorValue=255)), feat)
displayPars(hmmTrack) <- featCol

taf1Track <- UcscTrack(
    track="Uniform TFBS", 
    table="wgEncodeAwgTfbsHaibH1hescTaf1V0416102UniPk",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="TAF1",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

dnaseTrack <- UcscTrack(
    track="Uniform DNaseI HS", 
    table="wgEncodeAwgDnaseUwdukeH1hescUniPk",
    genome="hg19", 
    chromosome=chrom,
    from=start, 
    to=end, 
    trackType="AnnotationTrack",
    shape="box",
    start="chromStart",
    end="chromEnd",
    feature="itemRgb", 
    id="name", 
    collapse=FALSE,
    stacking="dense",
    fontsize=7,
    name="DHS",
    fontcolor.legend='black', 
    col.axis='black', 
    col.title='black',
    background.title='transparent', 
    cex=1,
    cex.id=1, 
    cex.axis=1, 
    cex.title=1,
    fontface=1, 
    fontface.title=1,
    lwd=0,
    fontface=1, 
    fontface.title=1,
    rotation.title=0
)

hgr <- GRanges(
    seqnames="chr6", 
    ranges=IRanges(start=hstarts, width=rep(1, length(hstarts))),
    )

pvalHT <- HighlightTrack(
    trackList=pvalTrack, 
    range=hgr,
    alpha=0.5,
    fill=NA)

taf1Track = setPar(taf1Track, "fill", tf_color)
dnaseTrack = setPar(dnaseTrack, "fill", dnase_color)



In [58]:

    
fn = os.path.join(outdir, 'pou5f1.pdf')



In [59]:

    
%%R -i fn,start,end

pdf(fn, 4.48, 2.75)
plotTracks(c(gtrack, biomTrack, pvalHT, taf1Track,
             hmmTrack), chromosome="chr6", from=start, to=end, 
           col.title="black", sizes=c(1.5, 1.5, 5, 0.5, 0.5))
dev.off()



In [ ]:

	primary	primary_sig
MARKER_ID
6:30134987_C/T_rs56866777	1.241391	NaN
6:30135134_T/C_rs117007413	1.241391	NaN
6:30135198_A/G_rs184969485	0.000913	NaN
6:30135298_G/C_rs144966950	1.241391	NaN
6:30135395_C/T_rs116487075	1.241391	NaN